/* * Copyright (C) 2014 The Calrissian Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.calrissian.flowmix.api.builder; import backtype.storm.topology.BoltDeclarer; import backtype.storm.topology.IComponent; import backtype.storm.topology.IRichBolt; import backtype.storm.topology.IRichSpout; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.TopologyBuilder; import backtype.storm.tuple.Fields; import org.calrissian.flowmix.api.storm.bolt.EventsLoaderBaseBolt; import org.calrissian.flowmix.api.storm.bolt.FlowLoaderBaseBolt; import org.calrissian.flowmix.api.storm.spout.EventsLoaderBaseSpout; import org.calrissian.flowmix.api.storm.spout.FlowLoaderBaseSpout; import org.calrissian.flowmix.core.storm.bolt.AggregatorBolt; import org.calrissian.flowmix.core.storm.bolt.EachBolt; import org.calrissian.flowmix.core.storm.bolt.FilterBolt; import org.calrissian.flowmix.core.storm.bolt.FlowInitializerBolt; import org.calrissian.flowmix.core.storm.bolt.JoinBolt; import org.calrissian.flowmix.core.storm.bolt.PartitionBolt; import org.calrissian.flowmix.core.storm.bolt.SelectorBolt; import org.calrissian.flowmix.core.storm.bolt.SortBolt; import org.calrissian.flowmix.core.storm.bolt.SplitBolt; import org.calrissian.flowmix.core.storm.bolt.SwitchBolt; import org.calrissian.flowmix.core.storm.spout.TickSpout; import static org.calrissian.flowmix.core.Constants.BROADCAST_STREAM; import static org.calrissian.flowmix.core.Constants.EVENT; import static org.calrissian.flowmix.core.Constants.FLOW_ID; import static org.calrissian.flowmix.core.Constants.FLOW_LOADER_STREAM; import static org.calrissian.flowmix.core.Constants.FLOW_OP_IDX; import static org.calrissian.flowmix.core.Constants.INITIALIZER; import static org.calrissian.flowmix.core.Constants.LAST_STREAM; import static org.calrissian.flowmix.core.Constants.OUTPUT; import static org.calrissian.flowmix.core.Constants.STREAM_NAME; import static org.calrissian.flowmix.core.model.op.AggregateOp.AGGREGATE; import static org.calrissian.flowmix.core.model.op.EachOp.EACH; import static org.calrissian.flowmix.core.model.op.FilterOp.FILTER; import static org.calrissian.flowmix.core.model.op.JoinOp.JOIN; import static org.calrissian.flowmix.core.model.op.PartitionOp.PARTITION; import static org.calrissian.flowmix.core.model.op.SelectOp.SELECT; import static org.calrissian.flowmix.core.model.op.SortOp.SORT; import static org.calrissian.flowmix.core.model.op.SplitOp.SPLIT; import static org.calrissian.flowmix.core.model.op.SwitchOp.SWITCH; /** * Builds the base flowmix topology configuration. The topology builder is returned so that it can be further * customized. Most often, it will be useful to further provision a downstream bolt that will process the data * even after the output. The output stream and component id provisioned on the output of the builder are both * "output". */ public class FlowmixBuilder { private IComponent flowLoaderSpout; private IComponent eventsComponent; private IRichBolt outputBolt; private int parallelismHint = 1; private int eventLoaderParallelism = -1; /** * @param flowLoader A spout that feeds rules into flowmix. This just needs to emit a Collection<Flow> in each tuple * at index 0 with a field name of "flows". * @param eventsSpout A spout that provides the events to std input. * @param outputBolt A bolt to accept the output events (with the field name "event") * @param parallelismHint The number of executors to run the parallel streams. */ public FlowmixBuilder setFlowLoader(FlowLoaderBaseSpout flowLoader) { this.flowLoaderSpout = flowLoader; return this; } public FlowmixBuilder setFlowLoader(FlowLoaderBaseBolt flowLoader) { this.flowLoaderSpout = flowLoader; return this; } public FlowmixBuilder setEventsLoader(EventsLoaderBaseBolt eventsLoader) { this.eventsComponent = eventsLoader; return this; } public FlowmixBuilder setEventsLoader(EventsLoaderBaseSpout eventsLoader) { this.eventsComponent = eventsLoader; return this; } public FlowmixBuilder setEventLoaderParallelism(int eventLoaderParallelism) { this.eventLoaderParallelism = eventLoaderParallelism; return this; } public FlowmixBuilder setOutputBolt(IRichBolt outputBolt) { this.outputBolt = outputBolt; return this; } public FlowmixBuilder setParallelismHint(int parallelismHint) { this.parallelismHint = parallelismHint; return this; } private void validateOptions() { String errorPrefix = "Error constructing Flowmix: "; if(flowLoaderSpout == null) throw new RuntimeException(errorPrefix + "A flow loader component needs to be set."); else if(eventsComponent == null) throw new RuntimeException(errorPrefix + "An event loader component needs to be set."); else if(outputBolt == null) throw new RuntimeException(errorPrefix + "An output bolt needs to be set."); } /** * @return A topology builder than can further be customized. */ public TopologyBuilder create() { TopologyBuilder builder = new TopologyBuilder(); if(eventsComponent instanceof IRichSpout) builder.setSpout(EVENT, (IRichSpout) eventsComponent, eventLoaderParallelism == -1 ? parallelismHint : eventLoaderParallelism); else if(eventsComponent instanceof IRichBolt) builder.setBolt(EVENT, (IRichBolt) eventsComponent, eventLoaderParallelism == -1 ? parallelismHint : eventLoaderParallelism); else throw new RuntimeException("The component for events is not valid. Must be IRichSpout or IRichBolt"); if(flowLoaderSpout instanceof IRichSpout) builder.setSpout(FLOW_LOADER_STREAM, (IRichSpout) flowLoaderSpout, 1); else if(flowLoaderSpout instanceof IRichBolt) builder.setBolt(FLOW_LOADER_STREAM, (IRichBolt) flowLoaderSpout, 1); else throw new RuntimeException("The component for rules is not valid. Must be IRichSpout or IRichBolt"); builder.setSpout("tick", new TickSpout(1000), 1); builder.setBolt(INITIALIZER, new FlowInitializerBolt(), parallelismHint) // kicks off a flow determining where to start .localOrShuffleGrouping(EVENT) .allGrouping(FLOW_LOADER_STREAM, FLOW_LOADER_STREAM); declarebolt(builder, FILTER, new FilterBolt(), parallelismHint, true); declarebolt(builder, SELECT, new SelectorBolt(), parallelismHint, true); declarebolt(builder, PARTITION, new PartitionBolt(), parallelismHint, true); declarebolt(builder, SWITCH, new SwitchBolt(), parallelismHint, true); declarebolt(builder, AGGREGATE, new AggregatorBolt(), parallelismHint, true); declarebolt(builder, JOIN, new JoinBolt(), parallelismHint, true); declarebolt(builder, EACH, new EachBolt(), parallelismHint, true); declarebolt(builder, SORT, new SortBolt(), parallelismHint, true); declarebolt(builder, SPLIT, new SplitBolt(), parallelismHint, true); declarebolt(builder, OUTPUT, outputBolt, parallelismHint, false); return builder; } private static void declarebolt(TopologyBuilder builder, String boltName, IRichBolt bolt, int parallelism, boolean control) { BoltDeclarer declarer = builder.setBolt(boltName, bolt, parallelism) .allGrouping(FLOW_LOADER_STREAM, FLOW_LOADER_STREAM) .allGrouping("tick", "tick") .localOrShuffleGrouping(INITIALIZER, boltName) .localOrShuffleGrouping(FILTER, boltName) .fieldsGrouping(PARTITION, boltName, new Fields(FLOW_ID, PARTITION)) // guaranteed partitions will always group the same flow for flows that have joins with default partitions. .localOrShuffleGrouping(AGGREGATE, boltName) .localOrShuffleGrouping(SELECT, boltName) .localOrShuffleGrouping(EACH, boltName) .localOrShuffleGrouping(SORT, boltName) .localOrShuffleGrouping(SWITCH, boltName) .localOrShuffleGrouping(SPLIT, boltName) .localOrShuffleGrouping(JOIN, boltName); if(control) { // control stream is all-grouped declarer.allGrouping(INITIALIZER, BROADCAST_STREAM + boltName) .allGrouping(FILTER, BROADCAST_STREAM + boltName) .allGrouping(PARTITION, BROADCAST_STREAM + boltName) .allGrouping(AGGREGATE, BROADCAST_STREAM + boltName) .allGrouping(SELECT, BROADCAST_STREAM + boltName) .allGrouping(EACH, BROADCAST_STREAM + boltName) .allGrouping(SORT, BROADCAST_STREAM + boltName) .allGrouping(SWITCH, BROADCAST_STREAM + boltName) .allGrouping(SPLIT, BROADCAST_STREAM + boltName) .allGrouping(JOIN, BROADCAST_STREAM + boltName); } } public static Fields fields = new Fields(FLOW_ID, EVENT, FLOW_OP_IDX, STREAM_NAME, LAST_STREAM); public static Fields partitionFields = new Fields(FLOW_ID, EVENT, FLOW_OP_IDX, STREAM_NAME, PARTITION, LAST_STREAM); public static void declareOutputStreams(OutputFieldsDeclarer declarer, Fields fields) { declarer.declareStream(PARTITION, fields); declarer.declareStream(FILTER, fields); declarer.declareStream(SELECT, fields); declarer.declareStream(AGGREGATE, fields); declarer.declareStream(SWITCH, fields); declarer.declareStream(SORT, fields); declarer.declareStream(JOIN, fields); declarer.declareStream(SPLIT, fields); declarer.declareStream(EACH, fields); declarer.declareStream(OUTPUT, fields); declarer.declareStream(BROADCAST_STREAM + PARTITION, fields); declarer.declareStream(BROADCAST_STREAM + FILTER, fields); declarer.declareStream(BROADCAST_STREAM + SELECT, fields); declarer.declareStream(BROADCAST_STREAM + AGGREGATE, fields); declarer.declareStream(BROADCAST_STREAM + SWITCH, fields); declarer.declareStream(BROADCAST_STREAM + SORT, fields); declarer.declareStream(BROADCAST_STREAM + JOIN, fields); declarer.declareStream(BROADCAST_STREAM + EACH, fields); declarer.declareStream(BROADCAST_STREAM + SPLIT, fields); } }